# This script will demonstrate how to analyze the ALPINE data collected as 
# part of the LongTerm Ecological Monitoring Initiative

# Notice that this example only has 1 years of data. We simulate multiple years to illustrate
# how to do a trend analysis.
#
#
# Only one study area at time can only be analyzed with a script. 
#
# This was programmed by Carl James Schwarz, Statistics and Actuarial Science, SFU
# cschwarz@stat.sfu.ca
#
# 2017-02-28 First Edition

# Summary of Protocol
#     “Permanent transects are established from just below present treeline 
#     to just above the transition from vegetation to rock (or ridgeline, whichever comes first).
#     A 50cm X 50cm quadrat is sampled at regular intervals along the transect.
#
#     The % foliar cover by species is recorded in each quadrat.”


# load libraries
library(betapart)  # for partitioning beta diversity
library(car)       # for testing for autocorrelation (2 libraries needed - see dwtest)
library(ggfortify) # for residual and other diagnostic plot
## Loading required package: ggplot2
library(ggplot2)   # for plotting
library(lmtest)    # for testing for autocorrelation
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(plyr)      # for group processing
library(readxl)    # for opening the Excel spreadsheets and reading off them
library(reshape2)  # for melting and casting
library(lmerTest)  # for the linear mixed modelling
## Loading required package: Matrix
## Loading required package: lme4
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(stringr)   # string handling (like case conversion)
library(vegan)     # for ordination methods (in general)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-2
# Load some common functions
source("../CommonFiles/common.functions.R")


cat("\n\n ***** Vegetation Analysis *****  \n\n")
## 
## 
##  ***** Vegetation Analysis *****
# get the data from the Excel work.books.
# we put the list of work books here, including the file type (xls or xlsx).

work.books.csv <- textConnection(
"file.name
General Survey Babine alpine_2013.xlsm
")

work.books <- read.csv(work.books.csv, as.is=TRUE, strip.white=TRUE, header=TRUE)
cat("File names with the data \n")
## File names with the data
work.books
##                                file.name
## 1 General Survey Babine alpine_2013.xlsm
# read each workbook and put all of the data together into one big data frame
veg.df <- plyr::ddply(work.books, "file.name", function(x){
  file.name <- file.path("Data", x$file.name)
  data <- readxl::read_excel(file.name, sheet="General Survey")
  data
})


# I will add some "fake data" based on the above data by randomly sampling from the above 
# and changing the dates to 2014 and 2015
set.seed(234234)
original.veg.df <- veg.df
veg.2013 <- original.veg.df[ sample(1:nrow(veg.df), nrow(veg.df), replace=TRUE),]
veg.2013$Date <- as.Date( paste("2013-", format(veg.df$Date, "%m-%d")))
veg.2017 <- veg.df[ sample(1:nrow(veg.df), nrow(veg.df), replace=TRUE),]
veg.2017$Date <- as.Date( paste("2017-", format(veg.df$Date, "%m-%d")))
veg.2021 <- veg.df[ sample(1:nrow(veg.df), nrow(veg.df), replace=TRUE),]
veg.2021$Date <- as.Date( paste("2021-", format(veg.df$Date, "%m-%d")))
veg.2024 <- original.veg.df[ sample(1:nrow(veg.df), nrow(veg.df), replace=TRUE),]
veg.2024$Date <- as.Date( paste("2024-", format(veg.df$Date, "%m-%d")))

veg.df <- rbind(veg.2013, veg.2017, veg.2021, veg.2024)
# combined species that were sampled twice
names(veg.df) <- make.names(names(veg.df))
veg.df <- plyr::ddply(veg.df, c("Study.Area.Name","Transect.Label","Date","Comments","Species"), plyr::summarize,
                      Foliar.cover....=sum(Foliar.cover...., na.rm=TRUE))
# fix up variable names in the data.frame.
# Variable names in R must start with a letter and contain letters or number or _. 
# Blanks in variable names are not normally allowed. Blanks will be replaced by . (period)
cat("\nOriginal variable names in data frame\n")
## 
## Original variable names in data frame
names(veg.df)
## [1] "Study.Area.Name"  "Transect.Label"   "Date"            
## [4] "Comments"         "Species"          "Foliar.cover...."
names(veg.df) <- make.names(names(veg.df))

cat("\nCorrected variable names of data frame\n")
## 
## Corrected variable names of data frame
names(veg.df)
## [1] "Study.Area.Name"  "Transect.Label"   "Date"            
## [4] "Comments"         "Species"          "Foliar.cover...."
# Convert dates to R date format
xtabs(~Date, data=veg.df, exclude=NULL, na.action=na.pass)  # check the date formats. Make sure that all yyyy-mm-dd
## Date
## 2013-09-09 2013-09-10 2017-09-09 2017-09-10 2021-09-09 2021-09-10 
##        137        146        134        145        132        143 
## 2024-09-09 2024-09-10 
##        141        146
veg.df$Date <- as.Date(veg.df$Date, "%d-%b-%y", tz="UTC")
veg.df$Year <- as.numeric(format(veg.df$Date, "%Y"))



# Check that the Study Area Name is the same across all years
# Look at the output from the xtabs() to see if there are multiple spellings 
# of the same Study.Area.Name.

# We will convert the Study.Area.Name to Proper Case.
veg.df$Study.Area.Name <- stringr::str_to_title(veg.df$Study.Area.Name)
xtabs(~Study.Area.Name,      data=veg.df, exclude=NULL, na.action=na.pass)
## Study.Area.Name
## Babine Mountain Park 
##                 1124
xtabs(~Study.Area.Name+Year, data=veg.df, exclude=NULL, na.action=na.pass)
##                       Year
## Study.Area.Name        2013 2017 2021 2024
##   Babine Mountain Park  283  279  275  287
# Check the Transect.Labels for typos
xtabs(~Study.Area.Name+Transect.Label, data=veg.df, exclude=NULL, na.action=na.pass)
##                       Transect.Label
## Study.Area.Name        Babine1 Babine2
##   Babine Mountain Park     556     568
# When is each transect measured?
xtabs(~Transect.Label+Year, data=veg.df, exclude=NULL, na.action=na.pass)
##               Year
## Transect.Label 2013 2017 2021 2024
##        Babine1  142  146  130  138
##        Babine2  141  133  145  149
xtabs(~Date+Transect.Label, data=veg.df, exclude=NULL, na.action=na.pass)
##             Transect.Label
## Date         Babine1 Babine2
##   2013-09-09      68      69
##   2013-09-10      74      72
##   2017-09-09      72      62
##   2017-09-10      74      71
##   2021-09-09      58      74
##   2021-09-10      72      71
##   2024-09-09      66      75
##   2024-09-10      72      74
# Check the plot codes
# In the Babine example, this is in the comment field. We need to extract
veg.df$Plot <- paste(veg.df$Comments, ';', sep="")
veg.df$Plot <- substr(veg.df$Plot, 1, -1+regexpr(";",veg.df$Plot, fixed=TRUE))
xtabs(~Transect.Label+Plot, data=veg.df, exclude=NULL, na.action=na.pass)
##               Plot
## Transect.Label Plot1 Plot10 Plot2 Plot3 Plot4 Plot5 Plot6 Plot7 Plot8
##        Babine1    74     64    55    57    26    90    42    72    29
##        Babine2    60     68    53    61    45    42    49    58    57
##               Plot
## Transect.Label Plot9
##        Babine1    47
##        Babine2    75
# Make the plot codes a combination of transect number and plot code
veg.df$Plot <- interaction(veg.df$Transect.Label, veg.df$Plot)
xtabs(~Transect.Label+Plot, data=veg.df, exclude=NULL, na.action=na.pass)
##               Plot
## Transect.Label Babine1.Plot1 Babine2.Plot1 Babine1.Plot10 Babine2.Plot10
##        Babine1            74             0             64              0
##        Babine2             0            60              0             68
##               Plot
## Transect.Label Babine1.Plot2 Babine2.Plot2 Babine1.Plot3 Babine2.Plot3
##        Babine1            55             0            57             0
##        Babine2             0            53             0            61
##               Plot
## Transect.Label Babine1.Plot4 Babine2.Plot4 Babine1.Plot5 Babine2.Plot5
##        Babine1            26             0            90             0
##        Babine2             0            45             0            42
##               Plot
## Transect.Label Babine1.Plot6 Babine2.Plot6 Babine1.Plot7 Babine2.Plot7
##        Babine1            42             0            72             0
##        Babine2             0            49             0            58
##               Plot
## Transect.Label Babine1.Plot8 Babine2.Plot8 Babine1.Plot9 Babine2.Plot9
##        Babine1            29             0            47             0
##        Babine2             0            57             0            75
# Check the Species code to make sure that they are all ok
xtabs(~Species, data=veg.df, exclude=NULL, na.action=na.pass)
## Species
##        ACONDEL        ANTEALP        ARNILAT        ARTENOR  BARBILOPHOZIA 
##             42              9             13             51             21 
##        BISTVIV  BRACHYTHECIUM        CAMPLAS       CARDBEL1        CAREALB 
##             10             16             25              3              2 
##       CAREPYR1          CAREX       CASSMER1        CETRERI        CETRISL 
##              2              2             18             10             30 
##        CLADMIT       CLADONIA        CLADPYX        CLADRAN        DACTARC 
##             25             45              1             30             16 
## DICOTYLEDONEAE       DICRANUM        EPILANG       ERIGPER1        FESTALT 
##              4             61              5             22             53 
##        FESTBRA        FESTVII        FLAVCUC       GENTIANA        HEUCGLA 
##              5              2             25              5              5 
##        HIERGRA        HYLOSPL        JUNCDRU        LOBALIN        LUETPEC 
##             19              2              6              2             17 
##         LUZULA        MICRFER           NULL        PEDICAP    PEDICULARIS 
##             23              6            100              6              2 
##       PEDILAG1      PELTIGERA        PHLEALP        PLEUSCH            POA 
##              6             20             20             31              4 
##       POA ALP2        POA ARC        POACEAE        POLYJUN        POTEDIV 
##              4              3              2             48              7 
##     POTENTILLA    RACOMITRIUM        RUMELAP        SALIARC        SALIRET 
##              9             10              5             10              8 
##        SEDUDIV        SELADEN        SENETRI        SIBBPRO        SILEACA 
##              4              7              5             42             26 
##        STELLON   STEREOCAULON        THAMVER        VACCCAE       VEROWOR1 
##             28             36             22             16             10
xtabs(~Species+Year, data=veg.df, exclude=NULL, na.action=na.pass)
##                 Year
## Species          2013 2017 2021 2024
##   ACONDEL          10    9   10   13
##   ANTEALP           1    1    5    2
##   ARNILAT           2    4    3    4
##   ARTENOR           9   17   13   12
##   BARBILOPHOZIA     3    7    6    5
##   BISTVIV           3    2    3    2
##   BRACHYTHECIUM     3    6    3    4
##   CAMPLAS           7    9    4    5
##   CARDBEL1          2    0    1    0
##   CAREALB           1    0    0    1
##   CAREPYR1          0    0    1    1
##   CAREX             1    0    0    1
##   CASSMER1          5    4    4    5
##   CETRERI           4    2    2    2
##   CETRISL           6    5   10    9
##   CLADMIT           7    5    8    5
##   CLADONIA         13   11    9   12
##   CLADPYX           0    1    0    0
##   CLADRAN           8    5   10    7
##   DACTARC           3    5    5    3
##   DICOTYLEDONEAE    0    1    2    1
##   DICRANUM         18   16   13   14
##   EPILANG           2    1    0    2
##   ERIGPER1          9    4    6    3
##   FESTALT          15   14   12   12
##   FESTBRA           1    1    2    1
##   FESTVII           0    0    1    1
##   FLAVCUC           6    4    9    6
##   GENTIANA          0    1    2    2
##   HEUCGLA           2    1    1    1
##   HIERGRA           6    3    5    5
##   HYLOSPL           0    1    0    1
##   JUNCDRU           1    2    2    1
##   LOBALIN           0    1    1    0
##   LUETPEC           8    2    5    2
##   LUZULA            7    7    6    3
##   MICRFER           1    2    2    1
##   NULL             23   22   26   29
##   PEDICAP           2    1    1    2
##   PEDICULARIS       1    0    1    0
##   PEDILAG1          2    2    1    1
##   PELTIGERA         4    5    3    8
##   PHLEALP           3    8    3    6
##   PLEUSCH          11    9    4    7
##   POA               1    2    0    1
##   POA ALP2          1    1    1    1
##   POA ARC           0    1    1    1
##   POACEAE           1    0    1    0
##   POLYJUN          14   11   13   10
##   POTEDIV           2    2    1    2
##   POTENTILLA        2    3    1    3
##   RACOMITRIUM       5    2    2    1
##   RUMELAP           2    1    0    2
##   SALIARC           1    2    3    4
##   SALIRET           1    3    1    3
##   SEDUDIV           1    2    0    1
##   SELADEN           2    1    2    2
##   SENETRI           1    2    0    2
##   SIBBPRO           9   13    8   12
##   SILEACA           5    8    7    6
##   STELLON           5    5   10    8
##   STEREOCAULON      6    8   12   10
##   THAMVER           7    5    2    8
##   VACCCAE           4    4    2    6
##   VEROWOR1          3    2    3    2
# Check the % cover to make sure that they are sensible
xtabs(~Foliar.cover...., data=veg.df, exclude=NULL, na.action=na.pass)
## Foliar.cover....
##   0 0.1 0.2 0.3 0.4 0.5 0.6   1 1.5   2   3   4   5   6   7   8   9  10 
## 100  87  86   7  14  80   1 172   3 135  49  66  54  35  17  31   7  28 
##  12  14  15  16  20  24  25  27  30  40  50  54  60  80  90 100 120 160 
##  26   5  26   6  16   5  14   1  14   9  10   2   6   3   3   2   2   1 
## 360 
##   1
# Exclude all species code = NULL which is usually rock and other junk
dim(veg.df)
## [1] 1124    8
veg.df <- veg.df[ !veg.df$Species %in% c("NULL"),]
dim(veg.df)
## [1] 1024    8
xtabs(~Species, data=veg.df, exclude=NULL, na.action=na.pass)
## Species
##        ACONDEL        ANTEALP        ARNILAT        ARTENOR  BARBILOPHOZIA 
##             42              9             13             51             21 
##        BISTVIV  BRACHYTHECIUM        CAMPLAS       CARDBEL1        CAREALB 
##             10             16             25              3              2 
##       CAREPYR1          CAREX       CASSMER1        CETRERI        CETRISL 
##              2              2             18             10             30 
##        CLADMIT       CLADONIA        CLADPYX        CLADRAN        DACTARC 
##             25             45              1             30             16 
## DICOTYLEDONEAE       DICRANUM        EPILANG       ERIGPER1        FESTALT 
##              4             61              5             22             53 
##        FESTBRA        FESTVII        FLAVCUC       GENTIANA        HEUCGLA 
##              5              2             25              5              5 
##        HIERGRA        HYLOSPL        JUNCDRU        LOBALIN        LUETPEC 
##             19              2              6              2             17 
##         LUZULA        MICRFER        PEDICAP    PEDICULARIS       PEDILAG1 
##             23              6              6              2              6 
##      PELTIGERA        PHLEALP        PLEUSCH            POA       POA ALP2 
##             20             20             31              4              4 
##        POA ARC        POACEAE        POLYJUN        POTEDIV     POTENTILLA 
##              3              2             48              7              9 
##    RACOMITRIUM        RUMELAP        SALIARC        SALIRET        SEDUDIV 
##             10              5             10              8              4 
##        SELADEN        SENETRI        SIBBPRO        SILEACA        STELLON 
##              7              5             42             26             28 
##   STEREOCAULON        THAMVER        VACCCAE       VEROWOR1 
##             36             22             16             10
# prefix for plot and other files created
file.prefix <- make.names(veg.df$Study.Area.Name[1])
file.prefix <- gsub(".", '-', file.prefix, fixed=TRUE) # convert spaces to -
file.prefix <- file.path("Plots",file.prefix)
#  Analysis of mean total cover

# Compute the total cover for each plot in each transect
cover <- plyr::ddply(veg.df, c("Study.Area.Name","Year","Transect.Label","Plot"), plyr::summarize,
                          cover=sum(Foliar.cover....))

# Compute the average total cover for each transect so I can plot these over time
cover.transect <- plyr::ddply(cover, c("Study.Area.Name","Year","Transect.Label"), plyr::summarize,
                          cover=mean(cover, na.rm=TRUE))
cover.transect
##        Study.Area.Name Year Transect.Label  cover
## 1 Babine Mountain Park 2013        Babine1 104.27
## 2 Babine Mountain Park 2013        Babine2  79.17
## 3 Babine Mountain Park 2017        Babine1  98.34
## 4 Babine Mountain Park 2017        Babine2  68.76
## 5 Babine Mountain Park 2021        Babine1  96.07
## 6 Babine Mountain Park 2021        Babine2  92.95
## 7 Babine Mountain Park 2024        Babine1  78.47
## 8 Babine Mountain Park 2024        Babine2  84.44
# Make a preliminary plot of cover by years

prelim.cover.plot <- ggplot(data=cover, aes(x=Year, y=cover, color=Transect.Label, linetype=Transect.Label))+
   ggtitle("Mean total cover")+
   ylab("Total % cover on the plots")+
   geom_point(position=position_dodge(width=.2))+
   geom_line(data=cover.transect)+
   scale_x_continuous(breaks=min(cover$Year, na.rm=TRUE):max(cover$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.cover.plot 

ggsave(plot=prelim.cover.plot, 
       file=paste(file.prefix,'-cover-plot-prelim.png',sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a regression analysis with Year as the trend variable and Transect as a random effects.
# We need to account for the same transect (and plots) being measured over time.
# Because this is a linear mixed model and the because the total cover is typically large enough, no transformation
# is needed.

# define the YearF effect for process error (year specific effcts)
cover.transect$YearF           <- factor(cover.transect$Year)
cover.transect$Transect.LabelF <- factor(cover.transect$Transect.Label)
cover.transect
##        Study.Area.Name Year Transect.Label  cover YearF Transect.LabelF
## 1 Babine Mountain Park 2013        Babine1 104.27  2013         Babine1
## 2 Babine Mountain Park 2013        Babine2  79.17  2013         Babine2
## 3 Babine Mountain Park 2017        Babine1  98.34  2017         Babine1
## 4 Babine Mountain Park 2017        Babine2  68.76  2017         Babine2
## 5 Babine Mountain Park 2021        Babine1  96.07  2021         Babine1
## 6 Babine Mountain Park 2021        Babine2  92.95  2021         Babine2
## 7 Babine Mountain Park 2024        Babine1  78.47  2024         Babine1
## 8 Babine Mountain Park 2024        Babine2  84.44  2024         Babine2
cover.fit.lmer <-  lmerTest::lmer(cover ~ Year + (1|Transect.LabelF) + (1|YearF), data=cover.transect)
anova(cover.fit.lmer,dfm='Kenward-Roger')
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##      Sum Sq Mean Sq NumDF  DenDF F.value Pr(>F)
## Year  32.29   32.29     1 5.0018  0.2505  0.638
summary(cover.fit.lmer)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: cover ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: cover.transect
## 
## REML criterion at convergence: 54.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4009 -0.7286  0.3755  0.5892  0.9004 
## 
## Random effects:
##  Groups          Name        Variance  Std.Dev. 
##  YearF           (Intercept) 4.691e-09 6.849e-05
##  Transect.LabelF (Intercept) 5.172e+01 7.192e+00
##  Residual                    1.289e+02 1.135e+01
## Number of obs: 8, groups:  YearF, 4; Transect.LabelF, 2
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept) 1066.0950  1954.6412    5.0020   0.545    0.609
## Year          -0.4846     0.9682    5.0020  -0.500    0.638
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
VarCorr(cover.fit.lmer)
##  Groups          Name        Std.Dev.  
##  YearF           (Intercept) 6.8494e-05
##  Transect.LabelF (Intercept) 7.1918e+00
##  Residual                    1.1354e+01
# Look at the residual plot 
diag.plot <- sf.autoplot.lmer(cover.fit.lmer)  # residual and other diagnostic plots
## Loading required package: grid
## Loading required package: gridExtra
## `geom_smooth()` using method = 'loess'
plot(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-cover-residual-lmer-plot.png",sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
cover.transect$resid <- cover.transect$cover - predict(cover.fit.lmer, newdata=cover.transect, re.form=~0)
mean.resid <- plyr::ddply(cover.transect, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.7333489      3.312864      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 3.3129, p-value = 0.9743
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
cover.slopes <- data.frame(
       Study.Area.Name =cover.transect$Study.Area.Name[1],
       slope           = fixef(cover.fit.lmer)["Year"],
       slope.se        = summary(cover.fit.lmer)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(cover.fit.lmer)$coefficients[row.names(summary(cover.fit.lmer)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       #r2             = summary(cover.fit.lmer)$r.squared,  # not defined for mixed effect models
       stringsAsFactors=FALSE)
cover.slopes
##           Study.Area.Name   slope  slope.se   p.value
## Year Babine Mountain Park -0.4846 0.6379659 0.6379659
# compute the fitted values from the model
cover.fitted <- data.frame(
                 Study.Area.Name=cover.transect$Study.Area.Name[1],
                 Year=seq(min(cover$Year, na.rm=TRUE),max(cover$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
cover.fitted$pred.mean <- predict(cover.fit.lmer, newdata=cover.fitted,type="response", re.form=~0)
head(cover.fitted)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  90.59520
## 2 Babine Mountain Park 2013.1  90.54674
## 3 Babine Mountain Park 2013.2  90.49828
## 4 Babine Mountain Park 2013.3  90.44982
## 5 Babine Mountain Park 2013.4  90.40136
## 6 Babine Mountain Park 2013.5  90.35290
# Plot with trend line 
cover.plot.summary <- ggplot2::ggplot(data=cover,
                                    aes(x=Year, y=cover))+
   ggtitle("Total Species Cover")+
   ylab("Total % cover")+
   geom_point(size=3, aes(color=Transect.Label),position=position_dodge(w=0.2))+
   geom_line(data=cover.fitted, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(cover$Year, na.rm=TRUE):max(cover$Year,na.rm=TRUE))+
   geom_text(data=cover.slopes, aes(x=min(cover$Year, na.rm=TRUE), y=max(cover$cover, na.rm=TRUE)), 
             label=paste("Slope : ",round(cover.slopes$slope,2), 
                         " ( SE "  ,round(cover.slopes$slope.se,2),")",
                         " p :"    ,round(cover.slopes$p.value,3)),
                         hjust="left")
cover.plot.summary

ggsave(plot=cover.plot.summary, 
       file=paste(file.prefix,'-cover-plot-summary-lmer.png',sep=""),
       h=6, w=6, units="in", dpi=300)



##### if lmer() does not converge, try an approximate analysis on the overall averages 


# Compute the average total cover for each transect so I can plot these over time
cover.avg <- plyr::ddply(cover.transect, c("Study.Area.Name","Year"), plyr::summarize,
                          cover=mean(cover, na.rm=TRUE))
cover.avg
##        Study.Area.Name Year  cover
## 1 Babine Mountain Park 2013 91.720
## 2 Babine Mountain Park 2017 83.550
## 3 Babine Mountain Park 2021 94.510
## 4 Babine Mountain Park 2024 81.455
# Make a preliminary plot of average cover by years

prelim.cover.plot.avg <- ggplot(data=cover.avg, aes(x=Year, y=cover))+
   ggtitle("Mean total cover - averaged over all transects in a year")+
   ylab("Mean Total % cover on the plots")+
   geom_point(position=position_dodge(width=.2))+
   geom_smooth(method="lm", se=FALSE)+
   scale_x_continuous(breaks=min(cover$Year, na.rm=TRUE):max(cover$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.cover.plot.avg 

ggsave(plot=prelim.cover.plot.avg, 
       file=paste(file.prefix,'-cover-plot-prelim-avg.png',sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a simple regression analysis with Year as the trend variable 

cover.fit.avg <-  lm(cover ~ Year, data=cover.avg)
anova(cover.fit.avg)
## Analysis of Variance Table
## 
## Response: cover
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Year       1  16.145  16.145  0.3148 0.6312
## Residuals  2 102.567  51.283
summary(cover.fit.avg)
## 
## Call:
## lm(formula = cover ~ Year, data = cover.avg)
## 
## Residuals:
##      1      2      3      4 
##  1.125 -5.107  7.792 -3.810 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1066.0950  1743.5533   0.611    0.603
## Year          -0.4846     0.8637  -0.561    0.631
## 
## Residual standard error: 7.161 on 2 degrees of freedom
## Multiple R-squared:  0.136,  Adjusted R-squared:  -0.296 
## F-statistic: 0.3148 on 1 and 2 DF,  p-value: 0.6312
# Look at the residual plot 
diag.plot <- autoplot(cover.fit.avg)  # residual and other diagnostic plots
show(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-cover-residual-avg-plot.png",sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
cover.avg$resid <- cover.avg$cover - predict(cover.fit.avg, newdata=cover.avg)
mean.resid <- plyr::ddply(cover.avg, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.7333489      3.312864      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 3.3129, p-value = 0.9743
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
cover.slopes.avg <- data.frame(
       Study.Area.Name =cover.transect$Study.Area.Name[1],
       slope           = coef(cover.fit.avg)["Year"],
       slope.se        = summary(cover.fit.avg)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(cover.fit.avg)$coefficients[row.names(summary(cover.fit.avg)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       r2              = summary(cover.fit.avg)$r.squared, 
       stringsAsFactors=FALSE)
cover.slopes.avg
##           Study.Area.Name   slope  slope.se   p.value        r2
## Year Babine Mountain Park -0.4846 0.6312152 0.6312152 0.1360022
# compute the fitted values from the model
cover.fitted.avg <- data.frame(
                 Study.Area.Name=cover.transect$Study.Area.Name[1],
                 Year=seq(min(cover$Year, na.rm=TRUE),max(cover$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
cover.fitted.avg$pred.mean <- predict(cover.fit.avg, newdata=cover.fitted,type="response")
head(cover.fitted.avg)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  90.59520
## 2 Babine Mountain Park 2013.1  90.54674
## 3 Babine Mountain Park 2013.2  90.49828
## 4 Babine Mountain Park 2013.3  90.44982
## 5 Babine Mountain Park 2013.4  90.40136
## 6 Babine Mountain Park 2013.5  90.35290
# Plot with trend line 
cover.plot.summary.avg <- ggplot2::ggplot(data=cover.avg,
                                    aes(x=Year, y=cover))+
   ggtitle("Total Species Cover")+
   ylab("Mean Total % cover")+
   geom_point(size=3,position=position_dodge(w=0.2))+
   geom_line(data=cover.fitted.avg, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(cover$Year, na.rm=TRUE):max(cover$Year,na.rm=TRUE))+
   geom_text(data=cover.slopes.avg, aes(x=min(cover$Year, na.rm=TRUE), y=max(cover.avg$cover, na.rm=TRUE)), 
             label=paste("Slope : ",round(cover.slopes.avg$slope,2), 
                         " ( SE "  ,round(cover.slopes.avg$slope.se,2),")",
                         " p :"    ,round(cover.slopes.avg$p.value,3)),
                         hjust="left")
cover.plot.summary.avg

ggsave(plot=cover.plot.summary.avg, 
       file=paste(file.prefix,'-cover-plot-summary-avg.png',sep=""),
       h=6, w=6, units="in", dpi=300)
#  Analysis of mean plot-level species richness
#  Only want those species that have cover > 0. Species with cover = 0 are not present.

# Compute the species richness for each plot in each transect
richness <- plyr::ddply(veg.df, c("Study.Area.Name","Year","Transect.Label","Plot"), plyr::summarize,
                          richness=length(Species[ Foliar.cover.... > 0]))

# Compute the average richness for each transect so I can plot these over time
richness.transect <- plyr::ddply(richness, c("Study.Area.Name","Year","Transect.Label"), plyr::summarize,
                          richness=mean(richness, na.rm=TRUE))

# Make a preliminary plot of richness by years

prelim.richness.plot <- ggplot(data=richness, aes(x=Year, y=richness, color=Transect.Label, linetype=Transect.Label))+
   ggtitle("Species richness")+
   ylab("Species richness")+
   geom_point(position=position_dodge(width=.2))+
   geom_line(data=richness.transect)+
   scale_x_continuous(breaks=min(richness$Year, na.rm=TRUE):max(richness$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.richness.plot 

ggsave(plot=prelim.richness.plot, 
       file=paste(file.prefix,'-richness-plot-prelim.png',sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a regression analysis with Year as the trend variable and Transect as a random effects.
# We need to account for the same transect (and plots) being measured over time.
# Because this is a linear mixed model and the because the total richness is typically large enough, no transformation
# is needed.

# define the YearF effect for process error (year specific effcts)
richness.transect$YearF           <- factor(richness.transect$Year)
richness.transect$Transect.LabelF <- factor(richness.transect$Transect.Label)
richness.transect
##        Study.Area.Name Year Transect.Label richness YearF Transect.LabelF
## 1 Babine Mountain Park 2013        Babine1     13.8  2013         Babine1
## 2 Babine Mountain Park 2013        Babine2     12.2  2013         Babine2
## 3 Babine Mountain Park 2017        Babine1     14.1  2017         Babine1
## 4 Babine Mountain Park 2017        Babine2     11.6  2017         Babine2
## 5 Babine Mountain Park 2021        Babine1     12.3  2021         Babine1
## 6 Babine Mountain Park 2021        Babine2     12.6  2021         Babine2
## 7 Babine Mountain Park 2024        Babine1     13.4  2024         Babine1
## 8 Babine Mountain Park 2024        Babine2     12.4  2024         Babine2
richness.fit.lmer <-  lmerTest::lmer(richness ~ Year + (1|Transect.LabelF) + (1|YearF), data=richness.transect)
anova(richness.fit.lmer,dfm='Kenward-Roger')
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##        Sum Sq  Mean Sq NumDF DenDF F.value Pr(>F)
## Year 0.065455 0.065455     1     5   0.139 0.7246
summary(richness.fit.lmer)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: richness ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: richness.transect
## 
## REML criterion at convergence: 21.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3885 -0.5126  0.3126  0.5194  1.1074 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  YearF           (Intercept) 0.0000   0.0000  
##  Transect.LabelF (Intercept) 0.6023   0.7761  
##  Residual                    0.4709   0.6862  
## Number of obs: 8, groups:  YearF, 4; Transect.LabelF, 2
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)  56.84545  118.14221   5.00000   0.481    0.651
## Year         -0.02182    0.05852   5.00000  -0.373    0.725
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
VarCorr(richness.fit.lmer)
##  Groups          Name        Std.Dev.
##  YearF           (Intercept) 0.00000 
##  Transect.LabelF (Intercept) 0.77606 
##  Residual                    0.68623
# Look at the residual plot 
diag.plot <- sf.autoplot.lmer(richness.fit.lmer)  # residual and other diagnostic plots
## `geom_smooth()` using method = 'loess'
plot(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-richness-residual-lmer-plot.png",sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
richness.transect$resid <- richness.transect$richness - predict(richness.fit.lmer, newdata=richness.transect, re.form=~0)
mean.resid <- plyr::ddply(richness.transect, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.4725704       2.58255      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 2.5826, p-value = 0.7612
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
richness.slopes <- data.frame(
       Study.Area.Name =richness.transect$Study.Area.Name[1],
       slope           = fixef(richness.fit.lmer)["Year"],
       slope.se        = summary(richness.fit.lmer)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(richness.fit.lmer)$coefficients[row.names(summary(richness.fit.lmer)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       #r2             = summary(richness.fit.lmer)$r.squared,  # not defined for mixed effect models
       stringsAsFactors=FALSE)
richness.slopes
##           Study.Area.Name       slope slope.se  p.value
## Year Babine Mountain Park -0.02181818 0.724563 0.724563
# compute the fitted values from the model
richness.fitted <- data.frame(
                 Study.Area.Name=richness.transect$Study.Area.Name[1],
                 Year=seq(min(richness$Year, na.rm=TRUE),max(richness$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
richness.fitted$pred.mean <- predict(richness.fit.lmer, newdata=richness.fitted,type="response", re.form=~0)
head(richness.fitted)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  12.92545
## 2 Babine Mountain Park 2013.1  12.92327
## 3 Babine Mountain Park 2013.2  12.92109
## 4 Babine Mountain Park 2013.3  12.91891
## 5 Babine Mountain Park 2013.4  12.91673
## 6 Babine Mountain Park 2013.5  12.91455
# Plot with trend line 
richness.plot.summary <- ggplot2::ggplot(data=richness,
                                    aes(x=Year, y=richness))+
   ggtitle("Total Species richness")+
   ylab("Total % richness")+
   geom_point(size=3, aes(color=Transect.Label),position=position_dodge(w=0.2))+
   geom_line(data=richness.fitted, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(richness$Year, na.rm=TRUE):max(richness$Year,na.rm=TRUE))+
   geom_text(data=richness.slopes, aes(x=min(richness$Year, na.rm=TRUE), y=max(richness$richness, na.rm=TRUE)), 
             label=paste("Slope : ",round(richness.slopes$slope,2), 
                         " ( SE "  ,round(richness.slopes$slope.se,2),")",
                         " p :"    ,round(richness.slopes$p.value,3)),
                         hjust="left")
richness.plot.summary

ggsave(plot=richness.plot.summary, 
       file=paste(file.prefix,'-richness-plot-summary-lmer.png',sep=""),
       h=6, w=6, units="in", dpi=300)



#####  if lmer() does not converge, try an approximate analysis on the overall averages 


# Compute the average total richness for each transect so I can plot these over time
richness.avg <- plyr::ddply(richness.transect, c("Study.Area.Name","Year"), plyr::summarize,
                          richness=mean(richness, na.rm=TRUE))
richness.avg
##        Study.Area.Name Year richness
## 1 Babine Mountain Park 2013    13.00
## 2 Babine Mountain Park 2017    12.85
## 3 Babine Mountain Park 2021    12.45
## 4 Babine Mountain Park 2024    12.90
# Make a preliminary plot of average richness by years

prelim.richness.plot.avg <- ggplot(data=richness.avg, aes(x=Year, y=richness))+
   ggtitle("Mean total richness - averaged over all transects in a year")+
   ylab("Mean Total % richness on the plots")+
   geom_point(position=position_dodge(width=.2))+
   geom_smooth(method="lm", se=FALSE)+
   scale_x_continuous(breaks=min(richness$Year, na.rm=TRUE):max(richness$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.richness.plot.avg 

ggsave(plot=prelim.richness.plot, 
       file=paste(file.prefix,'-richness-plot-prelim-avg.png', sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a simple regression analysis with Year as the trend variable 

richness.fit.avg <-  lm(richness ~ Year, data=richness.avg)
anova(richness.fit.avg)
## Analysis of Variance Table
## 
## Response: richness
##           Df   Sum Sq  Mean Sq F value Pr(>F)
## Year       1 0.032727 0.032727  0.4601 0.5676
## Residuals  2 0.142273 0.071136
summary(richness.fit.avg)
## 
## Call:
## lm(formula = richness ~ Year, data = richness.avg)
## 
## Residuals:
##        1        2        3        4 
##  0.07455  0.01182 -0.30091  0.21455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.84545   64.93711   0.875    0.474
## Year        -0.02182    0.03217  -0.678    0.568
## 
## Residual standard error: 0.2667 on 2 degrees of freedom
## Multiple R-squared:  0.187,  Adjusted R-squared:  -0.2195 
## F-statistic: 0.4601 on 1 and 2 DF,  p-value: 0.5676
# Look at the residual plot 
diag.plot <- autoplot(richness.fit.avg)  # residual and other diagnostic plots
show(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-richness-residual-avg-plot.png", sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
richness.avg$resid <- richness.avg$richness - predict(richness.fit.avg, newdata=richness.avg)
mean.resid <- plyr::ddply(richness.avg, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.4725704       2.58255      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 2.5826, p-value = 0.7612
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
richness.slopes.avg <- data.frame(
       Study.Area.Name =richness.transect$Study.Area.Name[1],
       slope           = coef(richness.fit.avg)["Year"],
       slope.se        = summary(richness.fit.avg)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(richness.fit.avg)$coefficients[row.names(summary(richness.fit.avg)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       r2              = summary(richness.fit.avg)$r.squared, 
       stringsAsFactors=FALSE)
richness.slopes.avg
##           Study.Area.Name       slope slope.se p.value       r2
## Year Babine Mountain Park -0.02181818  0.56755 0.56755 0.187013
# compute the fitted values from the model
richness.fitted.avg <- data.frame(
                 Study.Area.Name=richness.transect$Study.Area.Name[1],
                 Year=seq(min(richness$Year, na.rm=TRUE),max(richness$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
richness.fitted.avg$pred.mean <- predict(richness.fit.avg, newdata=richness.fitted,type="response")
head(richness.fitted.avg)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  12.92545
## 2 Babine Mountain Park 2013.1  12.92327
## 3 Babine Mountain Park 2013.2  12.92109
## 4 Babine Mountain Park 2013.3  12.91891
## 5 Babine Mountain Park 2013.4  12.91673
## 6 Babine Mountain Park 2013.5  12.91455
# Plot with trend line 
richness.plot.summary.avg <- ggplot2::ggplot(data=richness.avg,
                                    aes(x=Year, y=richness))+
   ggtitle("Total Species richness")+
   ylab("Mean Total % richness")+
   geom_point(size=3,position=position_dodge(w=0.2))+
   geom_line(data=richness.fitted.avg, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(richness$Year, na.rm=TRUE):max(richness$Year,na.rm=TRUE))+
   geom_text(data=richness.slopes.avg, aes(x=min(richness$Year, na.rm=TRUE), y=max(richness.avg$richness, na.rm=TRUE)), 
             label=paste("Slope : ",round(richness.slopes.avg$slope,2), 
                         " ( SE "  ,round(richness.slopes.avg$slope.se,2),")",
                         " p :"    ,round(richness.slopes.avg$p.value,3)),
                         hjust="left")
richness.plot.summary.avg

ggsave(plot=richness.plot.summary.avg, 
       file=paste(file.prefix,'-richness-plot-summary-avg.png',sep=""),
       h=6, w=6, units="in", dpi=300)
# 
#  Construct diversity profiles over time 

# Functions to compute the diversity profile
###########################################
#  Measuring species diversity using the entropy measures of Leinster and Cobbold (2012)
#  Code taken from http://jonlefcheck.net/2012/10/23/diversity-as-effective-numbers/


div.profile <- function(community, Z=diag(length(community))){
  # Compute profile scores for a vector of community abundances
  # See the Leinster and Cobbold (2012) paper
  
  # Input data
  #   community - vector of species counts in same order as Z matrix
  #   Z         - similarity matrix for each species vs other species
  #               default is a diagonal matrix in which every species is treated
  #               as functionally distinct
  #   Both community and Z must be ordered in the same way
  #   PerCover  - the percent cover (response variable) for which diversity matrix measured
    temp <- community >0
    community.red <- community[ temp]
    Z.red <- Z[temp,temp]
    p <- community.red/sum(community.red)
    ddply(data.frame(q=c(seq(0,.1,.01),seq(.2,5,.1))), "q", function(q, p){
      if(abs(q-1)>.05) { diversity <- sum(p*(Z.red%*%p)^(q$q-1))^(1/(1-q$q))}
      if(abs(q-1)<=.05){ diversity <- exp(-sum(p*log(Z.red%*%p), na.rm=TRUE))}
      names(diversity) <- 'diversity'
      diversity
    }, p=p)  
  }
  
# We construct the diversity profile for each transect in each year by summing
# the % cover over all the plots in a transect year
cover.transect <- plyr::ddply(veg.df, c("Study.Area.Name","Year","Transect.Label","Species"),
                              plyr::summarize,  cover=sum(Foliar.cover...., na.rm=TRUE))
head(cover.transect)
##        Study.Area.Name Year Transect.Label       Species cover
## 1 Babine Mountain Park 2013        Babine1       ACONDEL   9.0
## 2 Babine Mountain Park 2013        Babine1       ANTEALP   2.0
## 3 Babine Mountain Park 2013        Babine1       ARNILAT   4.1
## 4 Babine Mountain Park 2013        Babine1       ARTENOR  11.0
## 5 Babine Mountain Park 2013        Babine1 BARBILOPHOZIA   2.7
## 6 Babine Mountain Park 2013        Babine1       BISTVIV   0.9
# We need to make each transect have cover for ALL possible species in the overall database

cover.transect <- plyr::ddply(cover.transect, c("Study.Area.Name"), function (x){
   # extract all possible combination of year and species and transect and expand each transect
   trans.sp.year <- expand.grid(Study.Area.Name=unique(x$Study.Area.Name),
                                Species        =unique(x$Species),
                                Year           =unique(x$Year),
                                Transect.Label =unique(x$Transect.Label), stringsAsFactors=FALSE)
   x <- merge(x, trans.sp.year, all=TRUE)
   x$cover[ is.na(x$cover)] <- 0
   x
})

# Sort the data by species
cover.transect <- cover.transect[ order(cover.transect$Study.Area.Name,cover.transect$Year,
                                        cover.transect$Transect.Label,
                                        cover.transect$Species),]

# We dont have information on the Z matrix (species similarity) so we will
# use the default which is diagonal.
profiles <- ddply(cover.transect, c("Study.Area.Name","Year","Transect.Label"), function(x){
     cat('Profile computatations for ', unlist( x[1,]), "\n")
     profile <- div.profile(x$cover)
     profile
})
## Profile computatations for  Babine Mountain Park 2013 Babine1 ACONDEL 9 
## Profile computatations for  Babine Mountain Park 2013 Babine2 ACONDEL 1.6 
## Profile computatations for  Babine Mountain Park 2017 Babine1 ACONDEL 9.4 
## Profile computatations for  Babine Mountain Park 2017 Babine2 ACONDEL 0.2 
## Profile computatations for  Babine Mountain Park 2021 Babine1 ACONDEL 7.7 
## Profile computatations for  Babine Mountain Park 2021 Babine2 ACONDEL 1.4 
## Profile computatations for  Babine Mountain Park 2024 Babine1 ACONDEL 4.8 
## Profile computatations for  Babine Mountain Park 2024 Babine2 ACONDEL 3.5
# Plot the profiles
plot.profile <- ggplot2::ggplot(data=profiles, aes(x=q, y=diversity, color=Transect.Label, linetype=factor(Year) ))+
  ggtitle("Diversity profiles")+
  ylab("Effective number of species")+
  xlab("q")+
  geom_line()+
  facet_wrap(~Study.Area.Name, ncol=1)+
  scale_linetype_discrete(name="Year")
plot.profile

ggsave(plot=plot.profile, 
       file=paste(file.prefix,'-diversity-profiles.png',sep=""),
       h=6, w=6, units="in", dpi=300)


# Extract the value of effective number of species for q=2 and do the analysis
diversity.transect <- profiles[ profiles$q==2.0,]
diversity.transect
##          Study.Area.Name Year Transect.Label q diversity
## 30  Babine Mountain Park 2013        Babine1 2  6.615506
## 90  Babine Mountain Park 2013        Babine2 2 10.185613
## 150 Babine Mountain Park 2017        Babine1 2  6.250151
## 210 Babine Mountain Park 2017        Babine2 2 11.854679
## 270 Babine Mountain Park 2021        Babine1 2  3.200158
## 330 Babine Mountain Park 2021        Babine2 2 11.043594
## 390 Babine Mountain Park 2024        Babine1 2  4.255410
## 450 Babine Mountain Park 2024        Babine2 2  9.957339
temp <- diversity.transect
temp$diversity <- round(temp$diversity, 1)
print(temp, row.names=FALSE)
##       Study.Area.Name Year Transect.Label q diversity
##  Babine Mountain Park 2013        Babine1 2       6.6
##  Babine Mountain Park 2013        Babine2 2      10.2
##  Babine Mountain Park 2017        Babine1 2       6.3
##  Babine Mountain Park 2017        Babine2 2      11.9
##  Babine Mountain Park 2021        Babine1 2       3.2
##  Babine Mountain Park 2021        Babine2 2      11.0
##  Babine Mountain Park 2024        Babine1 2       4.3
##  Babine Mountain Park 2024        Babine2 2      10.0
# Make a preliminary plot of diversity by years

prelim.diversity.plot <- ggplot(data=diversity.transect, aes(x=Year, y=diversity, color=Transect.Label, linetype=Transect.Label))+
   ggtitle(paste("Effective number of species at q =", format(diversity.transect$q[1],nsmall=2),sep=""))+
   ylab("Effective number of species")+
   geom_point(position=position_dodge(width=.2))+
   geom_line(data=diversity.transect)+
   scale_x_continuous(breaks=min(diversity.transect$Year, na.rm=TRUE):max(diversity.transect$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.diversity.plot 

ggsave(plot=prelim.diversity.plot, 
       file=paste(file.prefix,'-diversity-plot-prelim.png',sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a regression analysis with Year as the trend variable and Transect as a random effects.
# We need to account for the same transect (and plots) being measured over time.
# Because this is a linear mixed model and the because the total diversity is typically large enough, no transformation
# is needed.

# define the YearF effect for process error (year specific effcts)
diversity.transect$YearF           <- factor(diversity.transect$Year)
diversity.transect$Transect.LabelF <- factor(diversity.transect$Transect.Label)
diversity.transect
##          Study.Area.Name Year Transect.Label q diversity YearF
## 30  Babine Mountain Park 2013        Babine1 2  6.615506  2013
## 90  Babine Mountain Park 2013        Babine2 2 10.185613  2013
## 150 Babine Mountain Park 2017        Babine1 2  6.250151  2017
## 210 Babine Mountain Park 2017        Babine2 2 11.854679  2017
## 270 Babine Mountain Park 2021        Babine1 2  3.200158  2021
## 330 Babine Mountain Park 2021        Babine2 2 11.043594  2021
## 390 Babine Mountain Park 2024        Babine1 2  4.255410  2024
## 450 Babine Mountain Park 2024        Babine2 2  9.957339  2024
##     Transect.LabelF
## 30          Babine1
## 90          Babine2
## 150         Babine1
## 210         Babine2
## 270         Babine1
## 330         Babine2
## 390         Babine1
## 450         Babine2
diversity.fit.lmer <-  lmerTest::lmer(diversity ~ Year + (1|Transect.LabelF) + (1|YearF), data=diversity.transect)
anova(diversity.fit.lmer,dfm='Kenward-Roger')
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##      Sum Sq Mean Sq NumDF  DenDF F.value Pr(>F)
## Year 3.4009  3.4009     1 5.0235  2.5033 0.1742
summary(diversity.fit.lmer)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: diversity ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: diversity.transect
## 
## REML criterion at convergence: 29.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3608 -0.3424  0.2804  0.6275  0.7541 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  YearF           (Intercept)  0.000   0.000   
##  Transect.LabelF (Intercept) 15.792   3.974   
##  Residual                     1.359   1.166   
## Number of obs: 8, groups:  YearF, 4; Transect.LabelF, 2
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)
## (Intercept) 325.4075   200.6854   5.0250   1.621    0.166
## Year         -0.1573     0.0994   5.0240  -1.582    0.174
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
VarCorr(diversity.fit.lmer)
##  Groups          Name        Std.Dev.
##  YearF           (Intercept) 0.0000  
##  Transect.LabelF (Intercept) 3.9739  
##  Residual                    1.1656
# Look at the residual plot 
diag.plot <- sf.autoplot.lmer(diversity.fit.lmer)  # residual and other diagnostic plots
## `geom_smooth()` using method = 'loess'
plot(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-diversity-residual-lmer-plot.png",sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
diversity.transect$resid <- diversity.transect$diversity - predict(diversity.fit.lmer, newdata=diversity.transect, re.form=~0)
mean.resid <- plyr::ddply(diversity.transect, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6741211      3.186394      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 3.1864, p-value = 0.9412
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
diversity.slopes <- data.frame(
       Study.Area.Name =diversity.transect$Study.Area.Name[1],
       slope           = fixef(diversity.fit.lmer)["Year"],
       slope.se        = summary(diversity.fit.lmer)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(diversity.fit.lmer)$coefficients[row.names(summary(diversity.fit.lmer)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       #r2             = summary(diversity.fit.lmer)$r.squared,  # not defined for mixed effect models
       stringsAsFactors=FALSE)
diversity.slopes
##           Study.Area.Name      slope  slope.se   p.value
## Year Babine Mountain Park -0.1572692 0.1741838 0.1741838
# compute the fitted values from the model
diversity.fitted <- data.frame(
                 Study.Area.Name=diversity.transect$Study.Area.Name[1],
                 Year=seq(min(diversity.transect$Year, na.rm=TRUE),max(diversity.transect$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
diversity.fitted$pred.mean <- predict(diversity.fit.lmer, newdata=diversity.fitted,type="response", re.form=~0)
head(diversity.fitted)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  8.824604
## 2 Babine Mountain Park 2013.1  8.808877
## 3 Babine Mountain Park 2013.2  8.793150
## 4 Babine Mountain Park 2013.3  8.777423
## 5 Babine Mountain Park 2013.4  8.761696
## 6 Babine Mountain Park 2013.5  8.745969
# Plot with trend line 
diversity.plot.summary <- ggplot2::ggplot(data=diversity.transect,
                                    aes(x=Year, y=diversity))+
   ggtitle(paste("Effective number of species at q =", format(diversity.transect$q[1],nsmall=2),sep=""))+
   ylab("Effective number of species")+
   geom_point(size=3, aes(color=Transect.Label),position=position_dodge(w=0.2))+
   geom_line(data=diversity.fitted, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(diversity.transect$Year, na.rm=TRUE):max(diversity.transect$Year,na.rm=TRUE))+
   geom_text(data=diversity.slopes, aes(x=min(diversity.transect$Year, na.rm=TRUE), y=max(diversity.transect$diversity, na.rm=TRUE)), 
             label=paste("Slope : ",round(diversity.slopes$slope,2), 
                         " ( SE "  ,round(diversity.slopes$slope.se,2),")",
                         " p :"    ,round(diversity.slopes$p.value,3)),
                         hjust="left")
diversity.plot.summary

ggsave(plot=diversity.plot.summary, 
       file=paste(file.prefix,'-diversity-plot-summary-lmer.png',sep=""),
       h=6, w=6, units="in", dpi=300)



##### if lmer() does not converge, try an approximate analysis on the overall averages 


# Compute the average total diversity for each transect so I can plot these over time
diversity.avg <- plyr::ddply(diversity.transect, c("Study.Area.Name","Year"), plyr::summarize,
                          diversity=mean(diversity, na.rm=TRUE))
diversity.avg
##        Study.Area.Name Year diversity
## 1 Babine Mountain Park 2013  8.400559
## 2 Babine Mountain Park 2017  9.052415
## 3 Babine Mountain Park 2021  7.121876
## 4 Babine Mountain Park 2024  7.106374
# Make a preliminary plot of average diversity by years

prelim.diversity.plot.avg <- ggplot(data=diversity.avg, aes(x=Year, y=diversity))+
   ggtitle("Mean effective number of species - averaged over all transects in a year")+
   ylab("Mean effective number of species")+
   geom_point(position=position_dodge(width=.2))+
   geom_smooth(method="lm", se=FALSE)+
   scale_x_continuous(breaks=min(diversity.transect$Year, na.rm=TRUE):max(diversity.transect$Year, na.rm=TRUE))+
   facet_wrap(~Study.Area.Name, ncol=1)
prelim.diversity.plot.avg 

ggsave(plot=prelim.diversity.plot, 
       file=paste(file.prefix,'-diversity-plot-prelim-avg.png',sep=""),
       h=6, w=6, units="in",dpi=300)


# This is a simple regression analysis with Year as the trend variable 

diversity.fit.avg <-  lm(diversity ~ Year, data=diversity.avg)
anova(diversity.fit.avg)
## Analysis of Variance Table
## 
## Response: diversity
##           Df Sum Sq Mean Sq F value Pr(>F)
## Year       1 1.7004 1.70043  3.0587 0.2224
## Residuals  2 1.1119 0.55593
summary(diversity.fit.avg)
## 
## Call:
## lm(formula = diversity ~ Year, data = diversity.avg)
## 
## Residuals:
##        1        2        3        4 
## -0.42404  0.85689 -0.44457  0.01173 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 325.40747  181.53326   1.793    0.215
## Year         -0.15727    0.08992  -1.749    0.222
## 
## Residual standard error: 0.7456 on 2 degrees of freedom
## Multiple R-squared:  0.6046, Adjusted R-squared:  0.407 
## F-statistic: 3.059 on 1 and 2 DF,  p-value: 0.2224
# Look at the residual plot 
diag.plot <- autoplot(diversity.fit.avg)  # residual and other diagnostic plots
show(diag.plot)

ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-diversity-residual-avg-plot.png",sep=""),
                h=6, w=6, units="in", dpi=300)

# check for autocorrelation - look at the average residual over time
diversity.avg$resid <- diversity.avg$diversity - predict(diversity.fit.avg, newdata=diversity.avg)
mean.resid <- plyr::ddply(diversity.avg, "Year", summarize, mean.resid=mean(resid))
resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
dwres1 <- car::durbinWatsonTest(resid.fit)
dwres1
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6741211      3.186394      NA
##  Alternative hypothesis: rho != 0
dwres2 <- lmtest::dwtest(resid.fit)
dwres2
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 3.1864, p-value = 0.9412
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope
diversity.slopes.avg <- data.frame(
       Study.Area.Name =diversity.transect$Study.Area.Name[1],
       slope           = coef(diversity.fit.avg)["Year"],
       slope.se        = summary(diversity.fit.avg)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(diversity.fit.avg)$coefficients[row.names(summary(diversity.fit.avg)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       r2              = summary(diversity.fit.avg)$r.squared, 
       stringsAsFactors=FALSE)
diversity.slopes.avg
##           Study.Area.Name      slope  slope.se   p.value        r2
## Year Babine Mountain Park -0.1572692 0.2224113 0.2224113 0.6046442
# compute the fitted values from the model
diversity.fitted.avg <- data.frame(
                 Study.Area.Name=diversity.transect$Study.Area.Name[1],
                 Year=seq(min(diversity.transect$Year, na.rm=TRUE),max(diversity.transect$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
diversity.fitted.avg$pred.mean <- predict(diversity.fit.avg, newdata=diversity.fitted,type="response")
head(diversity.fitted.avg)
##        Study.Area.Name   Year pred.mean
## 1 Babine Mountain Park 2013.0  8.824604
## 2 Babine Mountain Park 2013.1  8.808877
## 3 Babine Mountain Park 2013.2  8.793150
## 4 Babine Mountain Park 2013.3  8.777423
## 5 Babine Mountain Park 2013.4  8.761696
## 6 Babine Mountain Park 2013.5  8.745969
# Plot with trend line 
diversity.plot.summary.avg <- ggplot2::ggplot(data=diversity.avg,
                                    aes(x=Year, y=diversity))+
   ggtitle(paste("Effective number of species at q =", format(diversity.transect$q[1],nsmall=2),sep=""))+
   ylab("Mean effective number of species")+
   geom_point(size=3,position=position_dodge(w=0.2))+
   geom_line(data=diversity.fitted.avg, aes(x=Year,y=pred.mean))+
   facet_wrap(~Study.Area.Name, ncol=1, scales="free" )+
   scale_x_continuous(breaks=min(diversity.transect$Year, na.rm=TRUE):max(diversity.transect$Year,na.rm=TRUE))+
   geom_text(data=diversity.slopes.avg, aes(x=min(diversity.transect$Year, na.rm=TRUE), y=max(diversity.avg$diversity, na.rm=TRUE)), 
             label=paste("Slope : ",round(diversity.slopes.avg$slope,2), 
                         " ( SE "  ,round(diversity.slopes.avg$slope.se,2),")",
                         " p :"    ,round(diversity.slopes.avg$p.value,3)),
                         hjust="left")
diversity.plot.summary.avg

ggsave(plot=diversity.plot.summary.avg, 
       file=paste(file.prefix,'-diversity-plot-summary-avg.png',sep=""),
       h=6, w=6, units="in", dpi=300)
# Follow methods of Collins we partition beta diversity into turnover and nestedness


# Convert long format to wide format for each plot/transect combination


veg.wide <- reshape2::dcast(veg.df, Study.Area.Name+Transect.Label+Plot+Year~Species, sum,
                            value.var="Foliar.cover....", fill=0)
# impute a zero for any missing percent covers
veg.wide[ is.na(veg.wide)] <- 0

# Compute the distances for turnover and nestedness for each transect-year combination comparing to the first 
# years data.
# Then do a regression these over time

dissim.transect <- ddply(veg.wide, c("Study.Area.Name","Transect.Label"), function (x){
    # Convert the species dissimage to presence/absence
    species.names <- names(x)
    species.names <- names(x)[ !names(x) %in% c("Study.Area.Name","Transect.Label","Plot","Year")]

    # Compute the dissimiarlity for each plot in subsequent years relative to the first year the
    # plot was measured. Then average these diversity measures over the plots within a transect
    # first convert to presence/absence data
    x[, species.names] <- as.numeric( x[,species.names] > 0)
    plot.level <- plyr::ddply(x, "Plot", function(x, species.names){
         first.year <- x[ which.min(x$Year),]
         diffs <- ddply(x, "Year", function(x,first.year,species.names){
            #browser()
            dissim <- betapart::beta.pair( rbind(first.year[, species.names,drop=FALSE],x[,species.names,drop=FALSE]))
            dissim <- unlist(dissim)
            data.frame(betatype=names(dissim), dissim=dissim)
         },first.year=first.year, species.names=species.names)
    }, species.names=species.names)
    #browser()
    # drop the first years
    first.year <- min(x$Year)
    plot.level <- plot.level[ plot.level$Year != first.year,]
    mean.dissim <- plyr::ddply(plot.level, c("Year","betatype"), plyr::summarize, mean.dissim = mean(dissim))
    mean.dissim
})
dissim.transect
##         Study.Area.Name Transect.Label Year betatype mean.dissim
## 1  Babine Mountain Park        Babine1 2017 beta.sim  0.30989621
## 2  Babine Mountain Park        Babine1 2017 beta.sne  0.07632134
## 3  Babine Mountain Park        Babine1 2017 beta.sor  0.38621756
## 4  Babine Mountain Park        Babine1 2021 beta.sim  0.27610501
## 5  Babine Mountain Park        Babine1 2021 beta.sne  0.10219526
## 6  Babine Mountain Park        Babine1 2021 beta.sor  0.37830026
## 7  Babine Mountain Park        Babine1 2024 beta.sim  0.28030597
## 8  Babine Mountain Park        Babine1 2024 beta.sne  0.07704662
## 9  Babine Mountain Park        Babine1 2024 beta.sor  0.35735259
## 10 Babine Mountain Park        Babine2 2017 beta.sim  0.38361111
## 11 Babine Mountain Park        Babine2 2017 beta.sne  0.07095504
## 12 Babine Mountain Park        Babine2 2017 beta.sor  0.45456616
## 13 Babine Mountain Park        Babine2 2021 beta.sim  0.29047619
## 14 Babine Mountain Park        Babine2 2021 beta.sne  0.09314670
## 15 Babine Mountain Park        Babine2 2021 beta.sor  0.38362289
## 16 Babine Mountain Park        Babine2 2024 beta.sim  0.35845238
## 17 Babine Mountain Park        Babine2 2024 beta.sne  0.04185225
## 18 Babine Mountain Park        Babine2 2024 beta.sor  0.40030463
# Make a preliminary plot of dissimilarity by years

prelim.dissim.plot <- ggplot(data=dissim.transect, aes(x=Year, y=mean.dissim, color=Transect.Label, linetype=Transect.Label))+
   ggtitle("Mean dissimiarity relative to first year")+
   ylab("Mean dissimilarity relative to first year")+
   geom_point(position=position_dodge(width=.2))+
   geom_line()+
   scale_x_continuous(breaks=min(dissim.transect$Year, na.rm=TRUE):max(dissim.transect$Year, na.rm=TRUE))+
   facet_wrap(~interaction(Study.Area.Name,betatype,sep=" "), ncol=2, scales="free_y")+
   theme(legend.position = c(1, 0), legend.justification = c(1, 0)) 
prelim.dissim.plot 

ggsave(plot=prelim.dissim.plot, 
       file=paste(file.prefix,'-dissim-plot-prelim.png',sep=""),
       h=6, w=6, units="in",dpi=300)

dissim.transect$YearF           <- factor(dissim.transect$Year)
dissim.transect$Transect.LabelF <- factor(dissim.transect$Transect.Label)
dissim.transect
##         Study.Area.Name Transect.Label Year betatype mean.dissim YearF
## 1  Babine Mountain Park        Babine1 2017 beta.sim  0.30989621  2017
## 2  Babine Mountain Park        Babine1 2017 beta.sne  0.07632134  2017
## 3  Babine Mountain Park        Babine1 2017 beta.sor  0.38621756  2017
## 4  Babine Mountain Park        Babine1 2021 beta.sim  0.27610501  2021
## 5  Babine Mountain Park        Babine1 2021 beta.sne  0.10219526  2021
## 6  Babine Mountain Park        Babine1 2021 beta.sor  0.37830026  2021
## 7  Babine Mountain Park        Babine1 2024 beta.sim  0.28030597  2024
## 8  Babine Mountain Park        Babine1 2024 beta.sne  0.07704662  2024
## 9  Babine Mountain Park        Babine1 2024 beta.sor  0.35735259  2024
## 10 Babine Mountain Park        Babine2 2017 beta.sim  0.38361111  2017
## 11 Babine Mountain Park        Babine2 2017 beta.sne  0.07095504  2017
## 12 Babine Mountain Park        Babine2 2017 beta.sor  0.45456616  2017
## 13 Babine Mountain Park        Babine2 2021 beta.sim  0.29047619  2021
## 14 Babine Mountain Park        Babine2 2021 beta.sne  0.09314670  2021
## 15 Babine Mountain Park        Babine2 2021 beta.sor  0.38362289  2021
## 16 Babine Mountain Park        Babine2 2024 beta.sim  0.35845238  2024
## 17 Babine Mountain Park        Babine2 2024 beta.sne  0.04185225  2024
## 18 Babine Mountain Park        Babine2 2024 beta.sor  0.40030463  2024
##    Transect.LabelF
## 1          Babine1
## 2          Babine1
## 3          Babine1
## 4          Babine1
## 5          Babine1
## 6          Babine1
## 7          Babine1
## 8          Babine1
## 9          Babine1
## 10         Babine2
## 11         Babine2
## 12         Babine2
## 13         Babine2
## 14         Babine2
## 15         Babine2
## 16         Babine2
## 17         Babine2
## 18         Babine2
# Fit a line through each dissimilarity measured
fits <- dlply(dissim.transect, c("betatype"), function(x){
   cat("\n\n\n*** Starting analysis on beta diversity type ", x$betatype[1],  "\n")
   dissim.transect <- x

   dissim.fit.lmer <-  lmerTest::lmer(mean.dissim ~ Year + (1|Transect.LabelF) + (1|YearF), data=dissim.transect)
   print(anova(dissim.fit.lmer,dfm='Kenward-Roger'))
   print(summary(dissim.fit.lmer))
   print(VarCorr(dissim.fit.lmer))
   list(Study.Area.Name =x$Study.Area.Name[1],
        betatype=x$betatype[1], 
        fit=dissim.fit.lmer)
})
## 
## 
## 
## *** Starting analysis on beta diversity type  1 
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##          Sum Sq    Mean Sq NumDF DenDF F.value Pr(>F)
## Year 0.00021447 0.00021447     1 1.204 0.33826 0.6508
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: mean.dissim ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: dissim.transect
## 
## REML criterion at convergence: -8.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.92672 -0.43989 -0.00097  0.57087  0.75338 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  YearF           (Intercept) 0.001197 0.03460 
##  Transect.LabelF (Intercept) 0.001324 0.03638 
##  Residual                    0.000634 0.02518 
## Number of obs: 6, groups:  YearF, 3; Transect.LabelF, 2
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)  9.524378  15.832041  1.204000   0.602    0.641
## Year        -0.004557   0.007835  1.204000  -0.582    0.651
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
##  Groups          Name        Std.Dev.
##  YearF           (Intercept) 0.034601
##  Transect.LabelF (Intercept) 0.036385
##  Residual                    0.025180
## 
## 
## 
## *** Starting analysis on beta diversity type  2 
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##          Sum Sq    Mean Sq NumDF  DenDF  F.value Pr(>F)
## Year 1.2096e-05 1.2096e-05     1 1.1771 0.091469  0.807
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: mean.dissim ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: dissim.transect
## 
## REML criterion at convergence: -15.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1453 -0.2266  0.1312  0.2488  0.9408 
## 
## Random effects:
##  Groups          Name        Variance  Std.Dev.
##  YearF           (Intercept) 6.179e-04 0.024858
##  Transect.LabelF (Intercept) 9.265e-05 0.009625
##  Residual                    1.322e-04 0.011500
## Number of obs: 6, groups:  YearF, 3; Transect.LabelF, 2
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)  3.295099  10.640787  1.177100   0.310    0.803
## Year        -0.001593   0.005266  1.177100  -0.302    0.807
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
##  Groups          Name        Std.Dev. 
##  YearF           (Intercept) 0.0248576
##  Transect.LabelF (Intercept) 0.0096254
##  Residual                    0.0114996
## 
## 
## 
## *** Starting analysis on beta diversity type  3 
## Analysis of Variance Table of type III  with  Satterthwaite 
## approximation for degrees of freedom
##         Sum Sq   Mean Sq NumDF  DenDF F.value Pr(>F)
## Year 0.0018656 0.0018656     1 3.0041  4.2045 0.1326
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: mean.dissim ~ Year + (1 | Transect.LabelF) + (1 | YearF)
##    Data: dissim.transect
## 
## REML criterion at convergence: -12.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.10868 -0.50069  0.06357  0.45002  1.09148 
## 
## Random effects:
##  Groups          Name        Variance  Std.Dev.
##  YearF           (Intercept) 0.0000000 0.00000 
##  Transect.LabelF (Intercept) 0.0006077 0.02465 
##  Residual                    0.0004437 0.02106 
## Number of obs: 6, groups:  YearF, 3; Transect.LabelF, 2
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept) 12.819477   6.060063  3.004100   2.115    0.125
## Year        -0.006149   0.002999  3.004100  -2.050    0.133
## 
## Correlation of Fixed Effects:
##      (Intr)
## Year -1.000
##  Groups          Name        Std.Dev.
##  YearF           (Intercept) 0.000000
##  Transect.LabelF (Intercept) 0.024652
##  Residual                    0.021064
# Look at the residual plot 
plyr::l_ply(fits, function(x){
   cat("\n\n\n*** Generating diagnostic plots for beta diversity type ", x$betatype[1],  "\n")
   dissim.fit.lmer <- x$fit
   diag.plot <- sf.autoplot.lmer(dissim.fit.lmer)  # residual and other diagnostic plots
   plot(diag.plot)
   ggplot2::ggsave(plot=diag.plot, 
                file=paste(file.prefix,"-dissim-residual-lmer-plot-",x$betatype,".png",sep=""),
                h=6, w=6, units="in", dpi=300)
})
## 
## 
## 
## *** Generating diagnostic plots for beta diversity type  1
## `geom_smooth()` using method = 'loess'
## 
## 
## 
## *** Generating diagnostic plots for beta diversity type  2
## `geom_smooth()` using method = 'loess'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : Chernobyl! trL>n 6

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : Chernobyl! trL>n 6
## Warning in sqrt(sum.squares/one.delta): NaNs produced

## 
## 
## 
## *** Generating diagnostic plots for beta diversity type  3
## `geom_smooth()` using method = 'loess'

# check for autocorrelation - look at the average residual over time
plyr::l_ply(fits, function (x){
   cat("\n\n\n*** Testing for autocorrelation for beta diversity type ", x$betatype[1],  "\n")
   #browser()
   dissim.fit.lmer <- x$fit
   dissim.transect <- x$fit@frame
   dissim.transect$resid <- dissim.transect$mean.dissim - predict(dissim.fit.lmer, newdata=dissim.transect, re.form=~0)
   mean.resid <- plyr::ddply(dissim.transect, "Year", summarize, mean.resid=mean(resid))
   resid.fit <- lm( mean.resid ~ 1, data=mean.resid)
   dwres1 <- car::durbinWatsonTest(resid.fit)
   print(dwres1)
   dwres2 <- lmtest::dwtest(resid.fit)
   print(dwres2)
})
## 
## 
## 
## *** Testing for autocorrelation for beta diversity type  1 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6621622      2.986486   0.414
##  Alternative hypothesis: rho != 0
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 2.9865, p-value = 0.9476
## alternative hypothesis: true autocorrelation is greater than 0
## 
## 
## 
## 
## *** Testing for autocorrelation for beta diversity type  2 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6621622      2.986486      NA
##  Alternative hypothesis: rho != 0
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 2.9865, p-value = 0.9476
## alternative hypothesis: true autocorrelation is greater than 0
## 
## 
## 
## 
## *** Testing for autocorrelation for beta diversity type  3 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.6621622      2.986486   0.446
##  Alternative hypothesis: rho != 0
## 
##  Durbin-Watson test
## 
## data:  resid.fit
## DW = 2.9865, p-value = 0.9476
## alternative hypothesis: true autocorrelation is greater than 0
# extract the slope

dissim.slopes <- ldply(fits, function(x){
  dissim.transect <- x$fit@frame # get the data
  dissim.fit.lmer <- x$fit
  #browser()
  data.frame(
       Study.Area.Name = x$Study.Area.Name,
       slope           = fixef(dissim.fit.lmer)["Year"],
       slope.se        = summary(dissim.fit.lmer)$coefficients["Year","Pr(>|t|)"],
       p.value         = summary(dissim.fit.lmer)$coefficients[row.names(summary(dissim.fit.lmer)$coefficients)=="Year"  ,"Pr(>|t|)"], 
       #r2             = summary(dissim.fit.lmer)$r.squared,  # not defined for mixed effect models
       stringsAsFactors=FALSE)
})
dissim.slopes
##   betatype      Study.Area.Name        slope  slope.se   p.value
## 1 beta.sim Babine Mountain Park -0.004556864 0.6507720 0.6507720
## 2 beta.sne Babine Mountain Park -0.001592633 0.8069726 0.8069726
## 3 beta.sor Babine Mountain Park -0.006149497 0.1325767 0.1325767
# compute the fitted values from the model
dissim.fitted <- plyr::ldply(fits, function(x){
   dissim.fit.lmer <- x$fit
   dissim.transect <- x$fit@frame
   dissim.fitted <- data.frame(
                 Study.Area.Name=x$Study.Area.Name[1],
                 Year=seq(min(dissim.transect$Year, na.rm=TRUE),max(dissim.transect$Year, na.rm=TRUE), .1),
                 stringsAsFactors=FALSE)
   dissim.fitted$pred.mean <- predict(dissim.fit.lmer, newdata=dissim.fitted,type="response", re.form=~0)
   dissim.fitted
})
head(dissim.fitted)
##   betatype      Study.Area.Name   Year pred.mean
## 1 beta.sim Babine Mountain Park 2017.0 0.3331830
## 2 beta.sim Babine Mountain Park 2017.1 0.3327273
## 3 beta.sim Babine Mountain Park 2017.2 0.3322716
## 4 beta.sim Babine Mountain Park 2017.3 0.3318159
## 5 beta.sim Babine Mountain Park 2017.4 0.3313602
## 6 beta.sim Babine Mountain Park 2017.5 0.3309045
# Plot with trend line 
dissim.plot.summary <- ggplot2::ggplot(data=dissim.transect,
                                    aes(x=Year, y=mean.dissim))+
   ggtitle("Mean dissimilarity relative to first year")+
   ylab("Mean disssimilarity relative to first year")+
   geom_point(size=3, aes(color=Transect.Label),position=position_dodge(w=0.2))+
   geom_line(data=dissim.fitted, aes(x=Year,y=pred.mean))+
   facet_wrap(~interaction(Study.Area.Name,betatype,sep=" "), ncol=2, scales="free_y")+
   theme(legend.position = c(1, 0), legend.justification = c(1, 0))+ 
   scale_x_continuous(breaks=min(dissim.transect$Year, na.rm=TRUE):max(dissim.transect$Year,na.rm=TRUE))+
   geom_text(data=dissim.slopes, aes(x=min(dissim.transect$Year, na.rm=TRUE), y=max(dissim.transect$mean.dissim, na.rm=TRUE)), 
             label=paste("Slope : ",round(dissim.slopes$slope,2), 
                         " ( SE "  ,round(dissim.slopes$slope.se,2),")",
                         " p :"    ,round(dissim.slopes$p.value,3)),
                         hjust="left")
dissim.plot.summary

ggsave(plot=dissim.plot.summary, 
       file=paste(file.prefix,'-dissim-plot-summary-lmer.png',sep=""),
       h=6, w=6, units="in", dpi=300)